# -*- coding: utf-8 -*-
from app_thread import *

def get_page(redis_obj, cate_url):
    cate_page = requests.get(cate_url).content
    cate_content = BeautifulSoup(cate_page, "html.parser")
    if cate_content.select('.pagination__link') == None:
        max_page = 1
    else:
        max_page = cate_content.select('.pagination__link')[-1].get_text()
    for i in range(1, (int(max_page) + 1)):
        list_url = cate_url + '?page=' + str(i)
        print(list_url)
        list_page = requests.get(list_url).content
        list_content = BeautifulSoup(list_page, "html.parser")
        item_ele = list_content.select('.item-grid-item')
        if len(item_ele) > 0:
            data = {}
            for ele in item_ele:
                try:
                    url = 'https://mixkit.co' + ele.select('.item-grid-card__title a')[0].get('href')
                    id = url.replace('/', '').split('-')[-1]
                    redis_key = 'mixkit' + str(id)
                    if redis_obj.exists(redis_key) == 0:
                        data['id'] = id
                        data['cover'] = ele.select('.item-grid-video-player__thumb-wrapper img')[0].get('src')
                        data['small_videl'] = ele.select('.item-grid-video-player__video-wrapper video')[0].get('src')
                        data['url'] = url
                        data['detail'] = ele.select('.item-grid-card__title a')[0].get_text()
                        data['description'] = ele.select('.item-grid-card__description')[0].get_text() if len(ele.select('.item-grid-card__description')) > 0 else ''
                        # 获取详情，提取标签和下载地址
                        detail_page = requests.get(url).content
                        detail_content = BeautifulSoup(detail_page, "html.parser")
                        tag_list = detail_content.select('.meta-links__link')
                        tag_arr = []
                        if len(tag_list) > 0:
                            for tag in tag_list:
                                tag_val = tag.get_text()
                                if tag_val != '':
                                    tag_arr.append(tag_val)
                        data['tag_list'] = tag_arr
                        video_url = ''
                        first_str = str(detail_content.get_text).split('contentUrl":"')
                        if len(first_str) > 1:
                            two_str = first_str[1].split('","embedUrl"')
                            if len(two_str) > 1:
                                video_url = two_str[0]
                        data['video_url'] = video_url
                        print(data)
                        a = redis_obj.set(redis_key, json.dumps(data))
                        print(a)
                except Exception as e:
                    log(str(ele))
                    e_type, e_value, e_traceback = sys.exc_info()
                    log("self send msg daily----start---\n" + "type ==> %s" % (
                    e_type.__name__) + "\ntraceback ==> file name: %s" % (
                        e_traceback.tb_frame.f_code.co_filename) + "\ntraceback ==> line no: %s" % (
                        e_traceback.tb_lineno) + "\ntraceback ==> function name: %s" % (
                        e_traceback.tb_frame.f_code.co_name) + "\nself send msg daily----end---")
                    continue

if __name__=='__main__':
    print('开始时间：')
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))

    page = requests.get('https://mixkit.co/free-stock-video/').content
    content = BeautifulSoup(page, "html.parser")

    redis_obj = get_redis(1)

    a_helf_list = content.select('.global-navigation__link')
    if len(a_helf_list) > 0:
        cate_url_list = []
        for a in a_helf_list:
            path = a.get('href')
            if path != '#' and path != None:
                cate_url = 'https://mixkit.co' + path
                # get_page(redis_obj, cate_url)

                cate_url_list.append({'redis_obj':redis_obj, 'cate_url':cate_url})


        th_obj = MultiThreadTask(func=get_page, thread_num=len(cate_url_list), params_lst=cate_url_list)
        th_obj.start()

    print('结束时间：')
    print(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())))
    exit()



